# -*- coding: utf-8 -*-
'''
糗百笑话 简单爬取
'''
from urllib import request
import re
import time

page = 2
url = 'http://www.qiushibaike.com/hot/page/' + str(page)
user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
headers = {'User-Agent': user_agent}
print('******************爬虫一号:最新的糗百段子 ****************')
print('启动时间:', time.strftime("%Y/%m/%d %H:%M:%S", time.localtime()))
try:
    req = request.Request(url, headers=headers)
    response = request.urlopen(req)
    content = response.read().decode('utf-8')
    pattern = re.compile('<div.class="content">.*?</div>', re.S)
    items = re.findall(pattern, content)
    for item in items:
        res = r'<span>(.*?)</span>'
        mm = re.findall(res, item, re.S | re.M)
        for value in mm:
            print(value)
    print('第', page, '页', '  本页', items.__len__(), '条', '         ',
          time.strftime("%Y/%m/%d %H:%M:%S", time.localtime()))
except request.URLError as e:
    if hasattr(e, "code"):
        print(e.code)
    if hasattr(e, "reason"):
        print(e.reason)
